Q1

library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
data("txhousing")
df <- txhousing

variable_names <- names(df)
print(variable_names)
[1] "city"      "year"      "month"     "sales"     "volume"    "median"    "listings"  "inventory"
[9] "date"     
df$New_Var <- df$sales * df$median

num_cities <- length(unique(df$city))
print(num_cities)
[1] 46

There are 46 unique cities

missing_sales <- sum(is.na(df$sales))
print(missing_sales)
[1] 568

There are 568 missing sales

df_2010 <- subset(df, year == 2010)
avg_sales_2010 <- aggregate(sales ~ city, data = df_2010, mean)
names(avg_sales_2010)[2] <- "average_sales"
print(avg_sales_2010)

Q2

# Sum of sales per year
sales_per_year <- aggregate(sales ~ year, data = df, sum)

# Create a line graph
ggplot(sales_per_year, aes(x = year, y = sales)) +
  geom_line(color = "blue") +
  ggtitle("Total number of sales in different years") + 
  xlab("Years") + 
  ylab("Total Number of Sales") +
  theme_minimal()

Q3

\[ y = \beta_0 +\beta_1 * x1 +\beta_2 * x2 +\beta_3 * x3 + u \]

# Filter for Victoria
victoria_data <- df[df$city == "Victoria", ]

lin.model <- lm(log(sales) ~ listings + year + median, data = victoria_data)
summary(lin.model)

Call:
lm(formula = log(sales) ~ listings + year + median, data = victoria_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.58623 -0.12597  0.01247  0.15680  0.63317 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  4.265e+01  1.870e+01   2.280 0.023753 *  
listings    -2.942e-04  2.628e-04  -1.120 0.264379    
year        -1.946e-02  9.405e-03  -2.069 0.039967 *  
median       6.108e-06  1.559e-06   3.917 0.000127 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2275 on 181 degrees of freedom
  (2 observations deleted due to missingness)
Multiple R-squared:  0.1546,    Adjusted R-squared:  0.1406 
F-statistic: 11.03 on 3 and 181 DF,  p-value: 1.093e-06

Median is the most significant, Year and intercept are a little significant. listing is not significant.

Q4

One problem with the code is that it is using a single equals, it needs to use a double equals sign for comparison. The pipe operator used is new and you may not have a new enough version of R to use it, the other one is safer.

Q5

library(plotly)
Loading required package: ggplot2

Attaching package: ‘ggplot2’

The following object is masked _by_ ‘.GlobalEnv’:

    economics_long


Attaching package: ‘plotly’

The following object is masked from ‘package:ggplot2’:

    last_plot

The following object is masked from ‘package:stats’:

    filter

The following object is masked from ‘package:graphics’:

    layout

aplease

library(maps)
library(ggplot2)
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
data("canada.cities")

canada_cities_filtered <- canada.cities %>% 
  filter(country.etc != "Canada")

first_graph <- ggplot(canada_cities_filtered, aes(x = long, y = lat, color = country.etc)) +
  geom_point() +
  labs(title = "Cities in North America (Excluding Canada)",
       x = "Longitude",
       y = "Latitude",
       color = "Country") +
  theme_minimal()

# Convert ggplot to an interactive plotly plot
interactive_plot <- ggplotly(first_graph)

# Print the interactive plot
print(interactive_plot)
NULL

Q6

install.packages("titanic")
trying URL 'https://cran.rstudio.com/bin/macosx/big-sur-arm64/contrib/4.4/titanic_0.1.0.tgz'
Content type 'application/x-gzip' length 88521 bytes (86 KB)
==================================================
downloaded 86 KB

The downloaded binary packages are in
    /var/folders/bq/dkdbc7qs7g1d203tqt73k9300000gn/T//RtmpvxYC7V/downloaded_packages
library(titanic)

Attaching package: ‘titanic’

The following object is masked _by_ ‘.GlobalEnv’:

    titanic_train
data("titanic_train")
titanic_train$Pclass <- as.numeric(titanic_train$Pclass)
titanic_train$Sex <- ifelse(titanic_train$Sex == "male", 1, 0)
titanic_train$Survived <- as.numeric(titanic_train$Survived)
titanic_train$Parch <- as.numeric(titanic_train$Parch)

# Fit the logistic regression model
model <- glm(Survived ~ Pclass + Sex + Age + Parch, data = titanic_train, family = binomial)

# Display the summary of the model
summary(model)

Call:
glm(formula = Survived ~ Pclass + Sex + Age + Parch, family = binomial, 
    data = titanic_train)

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept)  5.200300   0.516675  10.065  < 2e-16 ***
Pclass      -1.287049   0.139186  -9.247  < 2e-16 ***
Sex         -2.585173   0.214296 -12.064  < 2e-16 ***
Age         -0.038228   0.007747  -4.934 8.04e-07 ***
Parch       -0.147168   0.115579  -1.273    0.203    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 964.52  on 713  degrees of freedom
Residual deviance: 645.60  on 709  degrees of freedom
  (177 observations deleted due to missingness)
AIC: 655.6

Number of Fisher Scoring iterations: 5

Estimated Coefficients: (Intercept): 5.200300 Pclass: -1.287049 Sexmale: -2.585173 Age: -0.038228 Parch: -0.147168

Pclass, sex, age are significant. Parch is not significant since its P value is 0.203.

Q7

library(tidyr)
# Load the dataset
data("economics_long", package = "ggplot2")

# Drop the column 'value01'
economics_long <- economics_long %>% 
  select(-value01)  # Adjust this line if 'value01' does not exist

# Convert from long to wide format
econW <- economics_long %>%
  pivot_wider(names_from = date, values_from = value)

# Get the dimensions of the wide format dataframe
print(dim(econW))
[1]   5 575

There are 5 rows and 575 columns

LS0tCnRpdGxlOiAiRmluYWwgRXhhbSAtIEt1bndhciBOaXIgMTAxMjU3NDI4IgpkYXRlOiAnYHIgU3lzLkRhdGUoKWAnCm91dHB1dDogCiAgaHRtbF9ub3RlYm9vazogCiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKICAgIGRmX3ByaW50OiBwYWdlZAplZGl0b3Jfb3B0aW9uczogCiAgbWFya2Rvd246IAogICAgd3JhcDogNzIKLS0tCgojIFExCgpgYGB7ciBlY2hvPVRSVUUsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIHBhZ2VkLnByaW50PVRSVUV9CgppbnN0YWxsLnBhY2thZ2VzKCJnZ3Bsb3QyIikKaW5zdGFsbC5wYWNrYWdlcygiZHBseXIiKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoZHBseXIpCmBgYAoKYGBge3IgZWNobz1UUlVFLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCBwYWdlZC5wcmludD1UUlVFfQpkYXRhKCJ0eGhvdXNpbmciKQpkZiA8LSB0eGhvdXNpbmcKCnZhcmlhYmxlX25hbWVzIDwtIG5hbWVzKGRmKQpwcmludCh2YXJpYWJsZV9uYW1lcykKYGBgCgpgYGB7ciBlY2hvPVRSVUUsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIHBhZ2VkLnByaW50PVRSVUV9CmRmJE5ld19WYXIgPC0gZGYkc2FsZXMgKiBkZiRtZWRpYW4KCm51bV9jaXRpZXMgPC0gbGVuZ3RoKHVuaXF1ZShkZiRjaXR5KSkKcHJpbnQobnVtX2NpdGllcykKYGBgClRoZXJlIGFyZSA0NiB1bmlxdWUgY2l0aWVzCmBgYHtyIGVjaG89VFJVRSwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRSwgcGFnZWQucHJpbnQ9VFJVRX0KbWlzc2luZ19zYWxlcyA8LSBzdW0oaXMubmEoZGYkc2FsZXMpKQpwcmludChtaXNzaW5nX3NhbGVzKQpgYGAKVGhlcmUgYXJlIDU2OCBtaXNzaW5nIHNhbGVzCmBgYHtyIGVjaG89VFJVRSwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRSwgcGFnZWQucHJpbnQ9VFJVRX0KZGZfMjAxMCA8LSBzdWJzZXQoZGYsIHllYXIgPT0gMjAxMCkKYXZnX3NhbGVzXzIwMTAgPC0gYWdncmVnYXRlKHNhbGVzIH4gY2l0eSwgZGF0YSA9IGRmXzIwMTAsIG1lYW4pCm5hbWVzKGF2Z19zYWxlc18yMDEwKVsyXSA8LSAiYXZlcmFnZV9zYWxlcyIKcHJpbnQoYXZnX3NhbGVzXzIwMTApCmBgYAogIAoKIyBRMgoKYGBge3IgZWNobz1UUlVFLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCBwYWdlZC5wcmludD1UUlVFfQojIFN1bSBvZiBzYWxlcyBwZXIgeWVhcgpzYWxlc19wZXJfeWVhciA8LSBhZ2dyZWdhdGUoc2FsZXMgfiB5ZWFyLCBkYXRhID0gZGYsIHN1bSkKCiMgQ3JlYXRlIGEgbGluZSBncmFwaApnZ3Bsb3Qoc2FsZXNfcGVyX3llYXIsIGFlcyh4ID0geWVhciwgeSA9IHNhbGVzKSkgKwogIGdlb21fbGluZShjb2xvciA9ICJibHVlIikgKwogIGdndGl0bGUoIlRvdGFsIG51bWJlciBvZiBzYWxlcyBpbiBkaWZmZXJlbnQgeWVhcnMiKSArIAogIHhsYWIoIlllYXJzIikgKyAKICB5bGFiKCJUb3RhbCBOdW1iZXIgb2YgU2FsZXMiKSArCiAgdGhlbWVfbWluaW1hbCgpCmBgYAoKIyMgUTMKCiAkJAp5ID0gXGJldGFfMCArXGJldGFfMSAqIHgxICtcYmV0YV8yICogeDIgK1xiZXRhXzMgKiB4MyArIHUKJCQKCmBgYHtyIGxpbmVhciByZWd9CnZpY3RvcmlhX2RhdGEgPC0gZGZbZGYkY2l0eSA9PSAiVmljdG9yaWEiLCBdCgpsaW4ubW9kZWwgPC0gbG0obG9nKHNhbGVzKSB+IGxpc3RpbmdzICsgeWVhciArIG1lZGlhbiwgZGF0YSA9IHZpY3RvcmlhX2RhdGEpCnN1bW1hcnkobGluLm1vZGVsKQpgYGAKTWVkaWFuIGlzIHRoZSBtb3N0IHNpZ25pZmljYW50LCBZZWFyIGFuZCBpbnRlcmNlcHQgYXJlIGEgbGl0dGxlIHNpZ25pZmljYW50LiBsaXN0aW5nIGlzIG5vdCBzaWduaWZpY2FudC4gCgoKIyMgUTQKCk9uZSBwcm9ibGVtIHdpdGggdGhlIGNvZGUgaXMgdGhhdCBpdCBpcyB1c2luZyBhIHNpbmdsZSBlcXVhbHMsIGl0IG5lZWRzIHRvIHVzZSBhIGRvdWJsZSBlcXVhbHMgc2lnbiBmb3IgY29tcGFyaXNvbi4gVGhlIHBpcGUgb3BlcmF0b3IgdXNlZCBpcyBuZXcgYW5kIHlvdSBtYXkgbm90IGhhdmUgYSBuZXcgZW5vdWdoIHZlcnNpb24gb2YgUiB0byB1c2UgaXQsIHRoZSBvdGhlciBvbmUgaXMgc2FmZXIuIAoKIyMgUTUKCmBgYHtyIGVjaG89VFJVRSwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRSwgcGFnZWQucHJpbnQ9VFJVRX0KaW5zdGFsbC5wYWNrYWdlcygibWFwcyIsIGRlcGVuZGVuY2llcyA9IFRSVUUpCmluc3RhbGwucGFja2FnZXMoInBsb3RseSIpCgpsaWJyYXJ5KHBsb3RseSkKYGBgICAKYXBsZWFzZQpgYGB7ciBlY2hvPVRSVUUsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIHBhZ2VkLnByaW50PVRSVUV9CmxpYnJhcnkobWFwcykKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGRwbHlyKQoKZGF0YSgiY2FuYWRhLmNpdGllcyIpCgpjYW5hZGFfY2l0aWVzX2ZpbHRlcmVkIDwtIGNhbmFkYS5jaXRpZXMgJT4lIAogIGZpbHRlcihjb3VudHJ5LmV0YyAhPSAiQ2FuYWRhIikKCmZpcnN0X2dyYXBoIDwtIGdncGxvdChjYW5hZGFfY2l0aWVzX2ZpbHRlcmVkLCBhZXMoeCA9IGxvbmcsIHkgPSBsYXQsIGNvbG9yID0gY291bnRyeS5ldGMpKSArCiAgZ2VvbV9wb2ludCgpICsKICBsYWJzKHRpdGxlID0gIkNpdGllcyBpbiBOb3J0aCBBbWVyaWNhIChFeGNsdWRpbmcgQ2FuYWRhKSIsCiAgICAgICB4ID0gIkxvbmdpdHVkZSIsCiAgICAgICB5ID0gIkxhdGl0dWRlIiwKICAgICAgIGNvbG9yID0gIkNvdW50cnkiKSArCiAgdGhlbWVfbWluaW1hbCgpCgojIENvbnZlcnQgZ2dwbG90IHRvIGFuIGludGVyYWN0aXZlIHBsb3RseSBwbG90CmludGVyYWN0aXZlX3Bsb3QgPC0gZ2dwbG90bHkoZmlyc3RfZ3JhcGgpCgojIFByaW50IHRoZSBpbnRlcmFjdGl2ZSBwbG90CnByaW50KGludGVyYWN0aXZlX3Bsb3QpCmBgYCAgIAoKCiMjIFE2CgpgYGB7ciBlY2hvPVRSVUUsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIHBhZ2VkLnByaW50PVRSVUV9Cmluc3RhbGwucGFja2FnZXMoInRpdGFuaWMiKQpsaWJyYXJ5KHRpdGFuaWMpCgpkYXRhKCJ0aXRhbmljX3RyYWluIikKdGl0YW5pY190cmFpbiRQY2xhc3MgPC0gYXMubnVtZXJpYyh0aXRhbmljX3RyYWluJFBjbGFzcykKdGl0YW5pY190cmFpbiRTZXggPC0gaWZlbHNlKHRpdGFuaWNfdHJhaW4kU2V4ID09ICJtYWxlIiwgMSwgMCkKdGl0YW5pY190cmFpbiRTdXJ2aXZlZCA8LSBhcy5udW1lcmljKHRpdGFuaWNfdHJhaW4kU3Vydml2ZWQpCnRpdGFuaWNfdHJhaW4kUGFyY2ggPC0gYXMubnVtZXJpYyh0aXRhbmljX3RyYWluJFBhcmNoKQoKIyBGaXQgdGhlIGxvZ2lzdGljIHJlZ3Jlc3Npb24gbW9kZWwKbW9kZWwgPC0gZ2xtKFN1cnZpdmVkIH4gUGNsYXNzICsgU2V4ICsgQWdlICsgUGFyY2gsIGRhdGEgPSB0aXRhbmljX3RyYWluLCBmYW1pbHkgPSBiaW5vbWlhbCkKCiMgRGlzcGxheSB0aGUgc3VtbWFyeSBvZiB0aGUgbW9kZWwKc3VtbWFyeShtb2RlbCkKYGBgCgoKRXN0aW1hdGVkIENvZWZmaWNpZW50czoKKEludGVyY2VwdCk6IDUuMjAwMzAwClBjbGFzczogLTEuMjg3MDQ5ClNleG1hbGU6IC0yLjU4NTE3MwpBZ2U6IC0wLjAzODIyOApQYXJjaDogLTAuMTQ3MTY4CgoKUGNsYXNzLCBzZXgsIGFnZSBhcmUgc2lnbmlmaWNhbnQuIFBhcmNoIGlzIG5vdCBzaWduaWZpY2FudCBzaW5jZSBpdHMgUCB2YWx1ZSBpcyAwLjIwMy4gCgoKIyMgUTcKYGBge3IgZWNobz1UUlVFLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCBwYWdlZC5wcmludD1UUlVFfQpsaWJyYXJ5KHRpZHlyKQoKZGF0YSgiZWNvbm9taWNzX2xvbmciLCBwYWNrYWdlID0gImdncGxvdDIiKQoKCmVjb25vbWljc19sb25nIDwtIGVjb25vbWljc19sb25nICU+JSAKICBzZWxlY3QoLXZhbHVlMDEpCgplY29uVyA8LSBlY29ub21pY3NfbG9uZyAlPiUKICBwaXZvdF93aWRlcihuYW1lc19mcm9tID0gZGF0ZSwgdmFsdWVzX2Zyb20gPSB2YWx1ZSkKCnByaW50KGRpbShlY29uVykpCmBgYAoKVGhlcmUgYXJlIDUgcm93cyBhbmQgNTc1IGNvbHVtbnMKCgo=